
### ################################################################################
### Takes in two sources of data (`norris_data.xls` and `post_2012_by_elections.csv`
### Tidies them up
### Spits out the result as `tidy_by_elex_data.rds`
### ################################################################################

here::i_am("newR/01_tidy_byelex.R")

library(rio)
library(tidyverse)
library(here)

### Handle the Norris data
dat <- rio::import(here::here("data", "norris_data_v2.xls"),
                   which = 2, skip = 3,
                   na = c(".", "NA")) %>%
    filter(!is.na(Name)) %>%
    filter(!grepl("^MEAN ", Name)) %>% 
    dplyr::select(id = No.,
                  Name = Name,
                  DD = ...3,
                  MM = ...4,
                  YYYY = ...5,
                  Region = Code...6,
                  Govt = Code...7,
                  Counter = No,
                  MP_at_GE = GE...9,
                  MP_at_BE = BE...10,
                  SeatChange = Ch...11,
                  Maj_at_GE = GE...12,
                  Turn_at_GE = GE...13,
                  Turn_at_BE = BE...14,
                  Turn_change = Ch...15,
                  Con_GE = GE...16,
                  Con_BE = Be...17,
                  Con_change = Ch...18,
                  Lab_GE = Ge...19,
                  Lab_BE = Be...20,
                  Lab_change = Ch...21,
                  Lib_GE = Ge...22,
                  Lib_BE = Be...23,
                  Lib_change = Ch...24,
                  SDP_addon = ...25,
                  SNP_GE = GE...26,
                  SNP_BE = BE...27,
                  SNP_change = Ch...28,
                  PC_GE = GE...29,
                  PC_BE = BE...30,
                  PC_change = Ch...31,
                  Oth_GE = GE...32,
                  Oth_BE = BE...33,
                  Oth_change = Ch...34) %>%
    dplyr::select(-ends_with("_change"))

### Tidy up some of the parties, add information on candidacy, govt
con_govts <- c(2, 3, 4, 7, 9, 10, 11, 12, 16)
lab_govts <- c(1, 5, 6, 8, 13:15)

dat <- dat %>%
    mutate_at(vars(ends_with("GE")), as.numeric) %>%
    mutate_at(vars(ends_with("BE")), as.numeric) %>%
    mutate(ConCand_BE = !is.na(Con_BE) & Con_BE > 0,
           LabCand_BE = !is.na(Lab_BE) & Lab_BE > 0,
           LibCand_BE = !is.na(Lib_BE) & Lib_BE > 0,
           SNPCand_BE = !is.na(SNP_BE) & SNP_BE > 0,
           PCCand_BE = !is.na(PC_BE) & PC_BE > 0,
           OthCand_BE = !is.na(Oth_BE) & Oth_BE > 0,
           ConCand_GE = !is.na(Con_GE) & Con_GE > 0,
           LabCand_GE = !is.na(Lab_GE) & Lab_GE > 0,
           LibCand_GE = !is.na(Lib_GE) & Lib_GE > 0,
           SNPCand_GE = !is.na(SNP_GE) & SNP_GE > 0,
           PCCand_GE = !is.na(PC_GE) & PC_GE > 0,
           OthCand_GE = !is.na(Oth_GE) & Oth_GE > 0) %>%
    mutate(Date = paste(YYYY, MM, DD, sep = "."),
           Date = as.Date(Date, format = "%Y.%m.%d")) %>%
    dplyr::select(-YYYY, -DD, -MM) %>% 
    mutate(Nation = dplyr::recode(Region,
                                  `11` = "Scotland",
                                  `12` = "Wales",
                                  .default = "England")) %>% 
    mutate_at(vars(ends_with("Cand_BE")), as.numeric) %>%
    mutate_at(vars(ends_with("Cand_GE")), as.numeric) %>% 
    mutate_at(vars(ends_with("GE")), coalesce, 0) %>%
    mutate_at(vars(ends_with("BE")), coalesce, 0) %>%
    mutate(SDP_addon = coalesce(SDP_addon, 0)) %>% 
    mutate(Nat_GE = SNP_GE + PC_GE,
           Nat_BE = SNP_BE + PC_BE,
           NatCand_BE = (SNPCand_BE | PCCand_BE),
           NatCand_GE = (SNPCand_GE | PCCand_GE),
           Oth_BE = Oth_BE + SDP_addon,
           ConInc = MP_at_GE == 1,
           LabInc = MP_at_GE == 2,
           LibInc = MP_at_GE == 3,
           NatInc = MP_at_GE %in% c(4, 5),
           OthInc = FALSE,
           ConGovt = Govt %in% con_govts,
           LabGovt = Govt %in% lab_govts,
           LibGovt = Govt == 16)

### Manual amendments
## dat$Date[which(dat$id == 414)] <- as.Date("2000-11-23")
## dat$Date[which(dat$id == 319)] <- as.Date("1978-05-31")
dat$Nation[which(dat$id == 121)] <- "England"
### There's an error in Glasgow Bridgeton, 1946
### dat[which(dat$id == 18), c("SNP_BE", "Oth_BE")] <- c(13.9, 34.3)

### Manually amend the result of Glasgow North East in the previous
### election to make Michael Martin's vote share a Labour vote share.
dat[which(dat$id == 435), c("Lab_GE", "Oth_GE")] <- c(53.3, 29.0)

### Add on a sum to "Other" which will bring the total up to 100 for
### both the BE and the GE
dat <- dat %>%
    mutate(under_BE = 100 - (Con_BE + Lab_BE + Lib_BE + Oth_BE + SNP_BE + PC_BE),
           under_GE = 100 - (Con_GE + Lab_GE + Lib_GE + Oth_GE + SNP_GE + PC_GE),
           Oth2_BE = Oth_BE + under_BE,
           Oth2_GE = Oth_GE + under_GE)

### Overwrite "Other cand"
dat <- dat %>%
    mutate(OthCand_GE = !is.na(Oth2_GE) & Oth2_GE > 0,
           OthCand_BE = !is.na(Oth2_BE) & Oth2_BE > 0)

## Add on information about post-2012 by-elections
post_2012 <- read.csv(here::here("data",
                                 "post_2012_by_elections.csv")) %>%
    dplyr::select(id = RefNo,
                  Name = ConstituencyName,
                  YYYY, MM, DD,
                  Nation,
                  ends_with("_GE"),
                  ends_with("_BE"),
                  ConInc, LabInc, LibInc, NatInc, OthInc) %>%
    mutate(Date = paste(YYYY, MM, DD, sep = "/"),
           Date = as.Date(Date)) %>%
    dplyr::select(-YYYY, -MM, -DD) %>% 
    mutate(ConCand_BE = !is.na(Con_BE),
           LabCand_BE = !is.na(Lab_BE),
           LibCand_BE = !is.na(Lib_BE),
           NatCand_BE = !is.na(Nat_BE),
           OthCand_BE = !is.na(Oth_BE),
           ConCand_GE = !is.na(Con_GE),
           LabCand_GE = !is.na(Lab_GE),
           LibCand_GE = !is.na(Lib_GE),
           NatCand_GE = !is.na(Nat_GE),
           OthCand_GE = !is.na(Oth_GE)) %>%
    mutate_at(vars(ends_with("Cand_BE")), as.numeric) %>%
    mutate_at(vars(ends_with("Cand_GE")), as.numeric) %>% 
    mutate_at(vars(ends_with("GE")), coalesce, 0) %>%
    mutate_at(vars(ends_with("BE")), coalesce, 0) %>%
    mutate(ConGovt = TRUE,
           LabGovt = FALSE,
           LibGovt = Date < as.Date("2015-05-07"))

comb <- merge(dat, post_2012, all = TRUE)

comb <- comb %>%
    dplyr::select(id, Name, Turn_at_GE,
                  Con_GE, Con_BE,
                  Lab_GE, Lab_BE,
                  Lib_GE, Lib_BE,
                  Oth_GE, Oth_BE,
                  Oth2_GE, Oth2_BE,
                  Nat_GE, Nat_BE,
                  ConCand_BE, ConCand_GE,
                  LabCand_BE, LabCand_GE,
                  LibCand_BE, LibCand_GE,
                  NatCand_BE, NatCand_GE,
                  OthCand_BE, OthCand_GE,
                  Date, Nation,
                  ConInc, LabInc, LibInc, NatInc, OthInc,
                  ConGovt, LabGovt, LibGovt)

saveRDS(comb,
        file = here("working", "tidy_by_elex_data.rds"))
